

/*
Procedure for generating synthetic data sets:
 1) indicator variables will equal 1 with probability = .5
 2) variables that were originally continuous or categorical will now all take 
    integer values from 0-100 with uniform probability
*/


********************************************************************************
* Generate Data_CBMortgages.dta
* This is a synthetic version of the data set of credit bureau mortgages - see 
* Table 1 in the paper for a description.
********************************************************************************

clear
set obs 100000

*generate indicator vars
foreach v in matched ex_loan_homepurchase ex_loan_conventional ex_loan_fha ex_loan_va ex_loan_fannie ex_loan_freddie l1_d_havedebt_mta {
	gen double `v' = runiform(0,1)
	replace `v' = 0 if `v' <=.5
	replace `v' = 1 if `v' >.5
	}

*generate other vars
foreach v in countyyearid ex_loan_amount_exexact l1_vantage_v3_score age l1_all_balance_open l1_all_pastdue2 l1_aua_balance_open {
	gen double `v' = runiform(0,100.9999)
	replace `v' = floor(`v')
	}
			  
la var ex_loan_conventional "Conventional Loan"
la var ex_loan_fha "FHA Loan"
la var ex_loan_va "VA Loan" 
la var ex_loan_fannie "Fannie Mae" 
la var ex_loan_freddie "Freddie Mac"
la var ex_loan_amount_exexact "Loan Amount"

la var l1_vantage_v3_score "Credit Score \textsubscript{t-1}"
la var age "Age"
la var l1_d_havedebt_mta "Have Mortgage \textsubscript{t-1}"
la var l1_all_balance_open "Total Debt \textsubscript{t-1}"
la var l1_all_pastdue2 "Past Due Debt \textsubscript{t-1}"
la var l1_aua_balance_open "Auto Debt \textsubscript{t-1}"

save Data_CBMortgages.dta, replace




********************************************************************************
* Generate Data_CBAllPersonYears.dta
* This is a synthetic version of the full 1% credit bureau panel. In reality it has 
* millions of observations. Here, it has 260,000 (20k/year for 2005-2017) to illustrate  
* that it's larger than the sample of mortgagors matched to hmda (who are a subset of this).
********************************************************************************
clear
set obs 260000

gen year = runiform(2005,2017.9999)
replace year = floor(year)

gen race = runiform(1,3.9999)
replace race = floor(race) // 1=white, 2=hispanic, 3=black

gen z = runiform(0,1)
gen aua_accessratio_annual_w0 = . // this will be an indicator for obtaining an auto loan during the year (missing when people don't search)
replace aua_accessratio_annual_w0 = 0 if z<.5 // for example, say 50% of person-years include a search for auto credit
replace aua_accessratio_annual_w0 = 1 if z<.25 // for example, say approval rate is 50%
drop z

*generate indicator vars
foreach v in matched l1_d_havedebt_mta {
	gen double `v' = runiform(0,1)
	replace `v' = 0 if `v' <=.5
	replace `v' = 1 if `v' >.5
	}

*generate other vars
foreach v in l1_vantage_v3_score age l1_all_balance_open l1_all_pastdue2 l1_aua_balance_open hmda_app_income new_dti {
	gen double `v' = runiform(0,100.9999)
	replace `v' = floor(`v')
	}
	
*these vars are only available in the hmda-matched subsample
replace race = . if matched==0
replace hmda_app_income = . if matched==0 
replace new_dti = . if matched==0 
			  
la var aua_accessratio_annual_w0 "Credit Approval (Auto)"
la var l1_vantage_v3_score "Credit Score \textsubscript{t-1}"
la var age "Age"
la var l1_d_havedebt_mta "Have Mortgage \textsubscript{t-1}"
la var l1_all_balance_open "Total Debt \textsubscript{t-1}"
la var l1_all_pastdue2 "Past Due Debt \textsubscript{t-1}"
la var l1_aua_balance_open "Auto Debt \textsubscript{t-1}"
la var hmda_app_income "Income"
la var new_dti "Debt to Income \textsubscript{t-1}"

save Data_CBAllPersonYears.dta, replace




********************************************************************************
* Generate Data_MainMatchedPanel.dta
* This is a synthetic version of the main credit bureau/HMDA matched panel. See
* Section 2.3 in the paper for details.
********************************************************************************
use Data_CBAllPersonYears.dta, clear
keep if matched==1

gen race_minority = 0 
replace race_minority = 1 if race>1
gen race_hispanic = 0
replace race_hispanic =1 if race==2

*gen additional indicators
foreach v in sex_female l1_subprime {
	gen double `v' = runiform(0,1)
	replace `v' = 0 if `v' <=.5
	replace `v' = 1 if `v' >.5
	}

*gen additional vars (0-100)
foreach v in ln_hmda_app_income ln_l1_all_balance_open ln_l1_all_pastdue2 ln_acs_pipc ln_popdensity acs_edu_bagrad acs_commute_usecar {
	gen double `v' = runiform(0,100.9999)
	replace `v' = floor(`v')
	}

*gen FE vars - reduce # of possible values to avoid creating singletons in the smaller example data sets
foreach v in timetomort statefips {
	gen double `v' = runiform(0,10.9999)
	replace `v' = floor(`v')
	}

la var race_minority "Minority"
la var race_hispanic "Hispanic"
la var sex_female "Female"
la var ln_hmda_app_income "log(Income)"
la var ln_l1_all_balance_open "log(Total Debt \textsubscript{t-1})"
la var ln_l1_all_pastdue2 "log(Past Due Debt \textsubscript{t-1})"
la var ln_acs_pipc "log(Personal Income Per Capita)"
la var ln_popdensity "log(Population Density)"
la var acs_edu_bagrad "Bachelors Degree"
la var acs_commute_usecar "Commute Using Car"

*gen cross-sectional interactions
foreach v in racialbias_hi lowcomp lowdealercomp nonbank_hi rural {
	gen double `v' = runiform(0,1)
	replace `v' = 0 if `v' <=.5
	replace `v' = 1 if `v' >.5
	}
 
la var racialbias_hi "High Racial Bias State"
gen race_minorityXbias = race_minority * racialbias_hi 
la var race_minorityXbias "Minority X High Racial Bias State"

la var lowcomp "Low Banking Competition"
gen race_minorityXlowcomp = race_minority * lowcomp
la var race_minorityXlowcomp "Minority X Low Banking Competition"

la var lowdealercomp "Low Dealer Competition"
gen race_minorityXlowdealercomp = race_minority * lowdealercomp
la var race_minorityXlowdealercomp "Minority X Low Dealer Competition"

la var nonbank_hi "High Non-Bank Financing"
gen race_minorityXnonbank_hi = race_minority * nonbank_hi
la var race_minorityXnonbank_hi "Minority X High Non-Bank Financing"

la var rural "Rural"
gen race_minorityXrural = race_minority * rural 
la var race_minorityXrural "Minority X Rural"

*gen credit card vars
gen z = runiform(0,1)
gen bcc_accessratio_annual_w0 = . // this will be an indicator for obtaining a new CC during the year (missing when people don't search)
replace bcc_accessratio_annual_w0 = 0 if z<.5 // for example, say 50% of person-years include a search for CC credit
replace bcc_accessratio_annual_w0 = 1 if z<.25 // for example, say approval rate is 50%
la var bcc_accessratio_annual_w0 "Credit Approval (CC)"
drop z

gen double bcc_credit_inc = runiform(0,100.9999)
replace bcc_credit_inc = floor(bcc_credit_inc)
replace bcc_credit_inc = 0 if bcc_accessratio_annual_w0 != 1	
la var bcc_credit_inc "CC Limit Increase"


***** Generate vars for the CFPB tests in Table 10
gen post = 0
replace post =1 if year >=2014
la var post "Post"

*interactions for nonbankshare split triple diff
gen minXhighXpost = race_minority * nonbank_hi * post
la var minXhighXpost "Minority X Post X High Non-Bank Financing"

gen minXhigh = race_minority * nonbank_hi
la var minXhigh "Minority X High Non-Bank Financing"

gen minXpost = race_minority * post
la var minXpost "Minority X Post"

gen highXpost = nonbank_hi * post
la var highXpost "Post X High Non-Bank Financing" 

*remaining interactions for racial bias split triple diff
gen minXpostXbias = race_minority * post * racialbias_hi
la var minXpostXbias "Minority X Post X High Racial Bias State"

gen minXbias = race_minority * racialbias_hi
la var minXbias  "Minority X High Racial Bias State"

gen postXbias = post * racialbias_hi
la var postXbias "Post X High Racial Bias State" 

save Data_MainMatchedPanel, replace




********************************************************************************
* Generate Data_AutoLoans.dta
* This is a synthetic version of the data set of auto loans from the credit bureau/HMDA
* matched panel. See Section 3.2 in the paper for a description.
********************************************************************************
use Data_MainMatchedPanel, clear

keep if aua_accessratio_annual_w0==1 & year >=2011
*   ^this is illustrative, see Sections 3.2 and 3.3.2 in the paper for a detailed
*   description of the filters used in the interest rate and default analyses.

*gen additional indicators
foreach v in d_delinqf2_aua {
	gen double `v' = runiform(0,1)
	replace `v' = 0 if `v' <=.5
	replace `v' = 1 if `v' >.5
	}

*gen additional vars (0-100)
foreach v in apr aua_credit_open ln_aua_credit_open lti_aua aua_debtshare_all {
	gen double `v' = runiform(0,100.9999)
	replace `v' = floor(`v')
	}

*gen FE vars - reduce # of possible values to avoid creating singletons in the smaller example data sets
foreach v in aua_termofrecenttrade origmonth {
	gen double `v' = runiform(0,10.9999)
	replace `v' = floor(`v')
	}

la var d_delinqf2_aua "Auto Loan Default" 
la var apr "Auto Loan Rate"
la var aua_credit_open "Auto Loan Amount"
la var ln_aua_credit_open "Log(Auto Loan Amount)"
la var lti_aua "Auto Loan to Income Ratio"
la var aua_debtshare_all "Auto Debt Share"
la var aua_termofrecenttrade "Auto Loan Term"
la var origmonth "Origination Month"	
	
save Data_AutoLoans.dta, replace











